# Packages and dependence
packageCheckClassic <- function(x){
  # 
  for( i in x ){
    if( ! require( i , character.only = TRUE ) ){
      install.packages( i , dependencies = TRUE )
      require( i , character.only = TRUE )
    }
  }
}

packageCheckClassic(c('gridExtra','DESeq2','adegenet','devtools','BiocManager','ggplot2','ggrepel','markdown','pheatmap','RColorBrewer','genefilter','gplots','vegan','dplyr','limma'))
## Loading required package: gridExtra
## Loading required package: DESeq2
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## Loading required package: adegenet
## Loading required package: ade4
## 
## Attaching package: 'ade4'
## The following object is masked from 'package:GenomicRanges':
## 
##     score
## The following object is masked from 'package:BiocGenerics':
## 
##     score
## 
##    /// adegenet 2.1.10 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()
## Loading required package: devtools
## Loading required package: usethis
## Loading required package: BiocManager
## Bioconductor version '3.14' is out-of-date; the current release version '3.18'
##   is available with R version '4.3'; see https://bioconductor.org/install
## 
## Attaching package: 'BiocManager'
## The following object is masked from 'package:devtools':
## 
##     install
## Loading required package: ggplot2
## Loading required package: ggrepel
## Loading required package: markdown
## Loading required package: pheatmap
## Loading required package: RColorBrewer
## Loading required package: genefilter
## 
## Attaching package: 'genefilter'
## The following objects are masked from 'package:MatrixGenerics':
## 
##     rowSds, rowVars
## The following objects are masked from 'package:matrixStats':
## 
##     rowSds, rowVars
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
## Loading required package: vegan
## Loading required package: permute
## 
## Attaching package: 'permute'
## The following object is masked from 'package:devtools':
## 
##     check
## Loading required package: lattice
## This is vegan 2.6-4
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.1.3
## 
## Attaching package: 'limma'
## The following object is masked from 'package:DESeq2':
## 
##     plotMA
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
#BiocManager::install('tximport', force = TRUE)
#BiocManager::install('apeglm')
#BiocManager::install('ashr')
#BiocManager::install("EnhancedVolcano")
#BiocManager::install("arrayQualityMetrics")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("yanlinlin82/ggvenn")
## Skipping install of 'ggvenn' from a github remote, the SHA1 (25fd3b55) has not changed since last install.
##   Use `force = TRUE` to force installation
library("adegenet")
library('ggvenn')
## Loading required package: grid
library('tximport')
library('apeglm')
library('ashr')
library('EnhancedVolcano')
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
library('pairwiseAdonis')
## Loading required package: cluster
library('BiocManager')
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## ℹ SHA-1 hash of file is "015fc0457e61e3e93a903e69a24d96d2dac7b9fb"
# Functions

trimFunction <- function(resLFC,p_adj_cut,lfc_cut) {
  resOrdered<-resLFC[order(resLFC$padj),]
  y<-na.omit(resOrdered)
  resTrim<-y[y$padj < p_adj_cut,]
  resTrim <- resTrim[ which( resTrim$log2FoldChange > lfc_cut | resTrim$log2FoldChange < -lfc_cut), ]
  resTrim <- as.data.frame(resTrim)
  resTrim$genes <- rownames(resTrim)
  return(resTrim)
}

applyAnnot <- function(inputDf,inputDfAnnot) {
  gene_and_annot_col <- character(0)
  inputDf_annot_genes <- inputDfAnnot$genes
  inputDf_annot_pfam <- inputDfAnnot$pfam_annotation
  for (i in 1:length(inputDf_annot_genes)) {
    gene_and_annot_col <- c(gene_and_annot_col, 
                            paste(inputDf_annot_genes[i], " - ", inputDf_annot_pfam[i]))
  }
  inputDf_genes <- inputDf$genes
  new_gene_annot_col <- character(0)
  for (i in inputDf_genes) {
    gene <- i
    for (j in gene_and_annot_col) {
      j_split <- strsplit(j, " ", fixed = TRUE)[[1]]
      if (i %in% j_split[1]) {
        gene <- j
      }
    }
    new_gene_annot_col <- c(new_gene_annot_col, gene)
  }
  inputDf$genes <- new_gene_annot_col
  return(inputDf)
}

heatmapFunction <- function(newColNames,commonGenes,commonGenesAll,vsd,nameCode) {
  vsdCommonGm <- vsd[commonGenes$genes,]
  vsdCommonGm@colData@rownames = newColNames
  annot_genes = commonGenesAll$genes
  genes = names(vsdCommonGm)
  
  new_names = list()
  for (i in annot_genes) {
    gene = i
    for (j in genes) {
      if (i == j) {
        gene = j
      }
    }
    new_names <- c(new_names,gene)
  }
  names(vsdCommonGm) <- new_names
  pheatmap(assay(vsdCommonGm),main=nameCode,scale="row", cluster_rows=T, show_rownames=T,
           cluster_cols=FALSE,cellwidth = 20,fontsize_row = 4)
  return(vsdCommonGm)
}

similarityFunction <- function(dataset1,dataset2) {
  c = 0
  for (i in rownames(dataset1)) {
    if (i %in% rownames(dataset2)) {
      c = c + 1
    }
  }
  sim = (c / as.integer(length(rownames(dataset2)))) * 100
  return(sim)
}


# Nov2016 dataset

# Working environment and data loading
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samples<-read.table('tximport_design_preliminarySamples.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/nov2016'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/preliminarySamples/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/nov2016", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1

# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts, colData = samples,design = ~site)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10 
dds <- dds[keep,]

# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
##      [,1]           
## [1,] "Intercept"    
## [2,] "site_pv_vs_gm"
## [3,] "site_sa_vs_gm"
# General 

vsdNov2016 = vst(dds,blind=T)

pcaData = plotPCA(vsdNov2016, intgroup="site",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "nov2016 dataset") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Common gm

res_pv_gm_lfc2_intra<-results(dds, contrast=c("site","pv","gm"), lfcThreshold = 1,alpha = 0.05)
res_pv_gm_lfc2_intra<-trimFunction(res_pv_gm_lfc2_intra,0.05,1)

res_sa_gm_lfc2_intra<-results(dds, contrast=c("site","sa","gm"), lfcThreshold = 1,alpha = 0.05)
res_sa_gm_lfc2_intra<-trimFunction(res_sa_gm_lfc2_intra,0.05,1)

common_gm_lfc2_intra <- merge(res_pv_gm_lfc2_intra,res_sa_gm_lfc2_intra,by="genes")
common_gm_annot_lfc2_intra = merge(common_gm_lfc2_intra,A_pfam,by="genes")
common_gm_all_lfc2_intra <- applyAnnot(common_gm_lfc2_intra,common_gm_annot_lfc2_intra)

newColNames = c("gm","gm","gm","gm","pv","pv","pv","sp","sp","sp")
vsd_common_gm_lfc2_intra_nov2016 = heatmapFunction(newColNames,common_gm_lfc2_intra,common_gm_all_lfc2_intra,vsdNov2016,"Nov2016 common GM genes - Intra method")

pcaData = plotPCA(vsd_common_gm_lfc2_intra_nov2016, intgroup="site",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "nov2016 dataset - Common GM DEGs") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Common sa
res_pv_sa_lfc2_intra<-results(dds, contrast=c("site","pv","sa"), lfcThreshold = 1,alpha = 0.05)
res_pv_sa_lfc2_intra<-trimFunction(res_pv_sa_lfc2_intra,0.05,1)

res_gm_sa_lfc2_intra<-results(dds, contrast=c("site","gm","sa"), lfcThreshold = 1,alpha = 0.05)
res_gm_sa_lfc2_intra<-trimFunction(res_gm_sa_lfc2_intra,0.05,1)

common_sa_lfc2_intra <- merge(res_pv_sa_lfc2_intra,res_gm_sa_lfc2_intra,by="genes")
common_sa_annot_lfc2_intra = merge(common_sa_lfc2_intra,A_pfam,by="genes")
common_sa_all_lfc2_intra <- applyAnnot(common_sa_lfc2_intra,common_sa_annot_lfc2_intra)

newColNames = c("gm","gm","gm","gm","pv","pv","pv","sp","sp","sp")
vsd_common_sa_lfc2_intra_nov2016 = heatmapFunction(newColNames,common_sa_lfc2_intra,common_sa_all_lfc2_intra,vsdNov2016,"Nov2016 common SA genes - Intra method")

pcaData = plotPCA(vsd_common_sa_lfc2_intra_nov2016, intgroup="site",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "nov2016 dataset - Common GM DEGs") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Common pv
res_sa_pv_lfc2_intra<-results(dds, contrast=c("site","sa","pv"), lfcThreshold = 1,alpha = 0.05)
res_sa_pv_lfc2_intra<-trimFunction(res_pv_gm_lfc2_intra,0.05,1)

res_gm_pv_lfc2_intra<-results(dds, contrast=c("site","gm","pv"), lfcThreshold = 1,alpha = 0.05)
res_gm_pv_lfc2_intra<-trimFunction(res_gm_pv_lfc2_intra,0.05,1)

common_pv_lfc2_intra <- merge(res_sa_pv_lfc2_intra,res_gm_pv_lfc2_intra,by="genes")
common_pv_annot_lfc2_intra = merge(common_pv_lfc2_intra,A_pfam,by="genes")
common_pv_all_lfc2_intra <- applyAnnot(common_pv_lfc2_intra,common_pv_annot_lfc2_intra)

newColNames = c("gm","gm","gm","gm","pv","pv","pv","sa","sa","sa")
vsd_common_pv_lfc2_intra_nov2016 = heatmapFunction(newColNames,common_pv_lfc2_intra,common_pv_all_lfc2_intra,vsdNov2016,"Nov2016 common PV genes - Intra method")

pcaData = plotPCA(vsd_common_pv_lfc2_intra_nov2016, intgroup="site",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "nov2016 dataset - Common GM DEGs") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Inferences statistics
count_tab_assay <- assay(vsdNov2016)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samples,dist_tab_assay ~ site, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ site, data = samples, method = "euclidian")
##          Df SumOfSqs      R2      F Pr(>F)   
## site      2    15225 0.35131 1.8955  0.002 **
## Residual  7    28114 0.64869                 
## Total     9    43339 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samples$site)
## Set of permutations < 'minperm'. Generating entire set.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
##      pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
## 1 gm vs pv  1  6659.756 1.607662 0.2433027   0.032      0.096    
## 2 gm vs sa  1  8500.558 2.183983 0.3040073   0.020      0.060    
## 3 pv vs sa  1  7688.511 1.915740 0.3238377   0.100      0.300
mod <- betadisper(dist_tab_assay,samples$site)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##             diff       lwr      upr     p adj
## pv-gm -0.9652221 -19.16340 17.23295 0.9866641
## sa-gm -5.0820012 -23.28018 13.11617 0.7020122
## sa-pv -4.1167790 -23.57145 15.33789 0.8125071
# May 2018 dataset

# Working environment and data loading
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesBck<-read.table('tximport_design_trueTransplant_bck.txt',header=T)
samplesTroBck<-read.table('tximport_design_trueTransplant_tro_bck.txt',header=T)
samplesTroBck2<-read.table('tximport_design_trueTransplant_tro_bck_2.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/may2018'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/trueTransplant/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/may2018", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
raw_counts_bck <- raw_counts[,grep("bck", colnames(raw_counts))]
raw_counts_bck_tro <- raw_counts[,grep("bck|tro", colnames(raw_counts))] 

# DDS object
ddsBck<-DESeqDataSetFromMatrix(countData = raw_counts_bck, colData = samplesBck,design = ~site)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
ddsBckTro<-DESeqDataSetFromMatrix(countData = raw_counts_bck_tro, colData = samplesTroBck,design = ~site + experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
ddsBckTro2<-DESeqDataSetFromMatrix(countData = raw_counts_bck_tro, colData = samplesTroBck2,design = ~originSite_finalSite_experiment) 
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(ddsBck)) >= 10 
ddsBck <- ddsBck[keep,]
keep <- rowSums(counts(ddsBckTro)) >= 10 
ddsBckTro <- ddsBckTro[keep,]
keep <- rowSums(counts(ddsBckTro2)) >= 10 
ddsBckTro2 <- ddsBckTro2[keep,]

# Differential expression analysis
ddsBck<-DESeq(ddsBck)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsBck))
##      [,1]           
## [1,] "Intercept"    
## [2,] "site_pv_vs_gm"
## [3,] "site_sp_vs_gm"
ddsBckTro<-DESeq(ddsBckTro)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsBckTro))
##      [,1]                   
## [1,] "Intercept"            
## [2,] "site_pv_vs_gm"        
## [3,] "site_sp_vs_gm"        
## [4,] "experiment_tro_vs_bck"
ddsBckTro2<-DESeq(ddsBckTro2)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsBckTro2))
##      [,1]                                                    
## [1,] "Intercept"                                             
## [2,] "originSite_finalSite_experiment_gm_gm_tro_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_gm_bck"
## [4,] "originSite_finalSite_experiment_pv_pv_tro_vs_gm_gm_bck"
## [5,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_gm_bck"
## [6,] "originSite_finalSite_experiment_sp_sp_tro_vs_gm_gm_bck"
# General 
vsdMay2018Bck = vst(ddsBck,blind=T)
vsdMay2018BckTro = vst(ddsBckTro,blind=T)
vsdMay2018BckTro2 = vst(ddsBckTro2,blind=T)

pcaData = plotPCA(vsdMay2018Bck, intgroup="site",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - Background") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

pcaData = plotPCA(vsdMay2018BckTro, intgroup=c("site","experiment"),returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site,shape = experiment)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
  scale_shape_manual(values = c("circle", "triangle")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - Background & transplant origin") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Common gm
res_pv_gm_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","pv_pv_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_gm_lfc2<-trimFunction(res_pv_gm_lfc2,0.05,1)

res_sp_gm_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","sp_sp_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_gm_lfc2<-trimFunction(res_sp_gm_lfc2,0.05,1)

common_gm_lfc2 <- merge(res_pv_gm_lfc2,res_sp_gm_lfc2,by="genes")
common_gm_annot_lfc2 = merge(common_gm_lfc2,A_pfam,by="genes")
common_gm_all_lfc2 <- applyAnnot(common_gm_lfc2,common_gm_annot_lfc2)

newColNames = c("gm_gm_bck","gm_gm_bck","gm_gm_bck","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","pv_pv_bck","pv_pv_bck","pv_pv_bck","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","sp_sp_bck","sp_sp_bck","sp_sp_bck","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro")
vsd_common_gm_may2018 = heatmapFunction(newColNames,common_gm_lfc2,common_gm_all_lfc2,vsdMay2018BckTro2,"May2018 common GM genes")

# Common pv
res_gm_pv_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","gm_gm_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_pv_lfc2<-trimFunction(res_gm_pv_lfc2,0.05,1)

res_sp_pv_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","sp_sp_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_pv_lfc2<-trimFunction(res_sp_pv_lfc2,0.05,1)

common_pv_lfc2 <- merge(res_gm_pv_lfc2,res_sp_pv_lfc2,by="genes")
common_pv_annot_lfc2 = merge(common_pv_lfc2,A_pfam,by="genes")
common_pv_all_lfc2 <- applyAnnot(common_pv_lfc2,common_pv_annot_lfc2)

newColNames = c("gm_gm_bck","gm_gm_bck","gm_gm_bck","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","pv_pv_bck","pv_pv_bck","pv_pv_bck","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","sp_sp_bck","sp_sp_bck","sp_sp_bck","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro")
vsd_common_pv_may2018 = heatmapFunction(newColNames,common_pv_lfc2,common_pv_all_lfc2,vsdMay2018BckTro2,"May2018 common PV genes")

# Common sp
res_gm_sp_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","gm_gm_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_sp_lfc2<-trimFunction(res_gm_sp_lfc2,0.05,1)

res_pv_sp_lfc2<-results(ddsBckTro2, contrast=c("originSite_finalSite_experiment","pv_pv_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_sp_lfc2<-trimFunction(res_pv_sp_lfc2,0.05,1)

common_sp_lfc2 <- merge(res_gm_sp_lfc2,res_pv_sp_lfc2,by="genes")
common_sp_annot_lfc2 = merge(common_sp_lfc2,A_pfam,by="genes")
common_sp_all_lfc2 <- applyAnnot(common_sp_lfc2,common_sp_annot_lfc2)

newColNames = c("gm_gm_bck","gm_gm_bck","gm_gm_bck","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","gm_gm_tro","pv_pv_bck","pv_pv_bck","pv_pv_bck","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","pv_pv_tro","sp_sp_bck","sp_sp_bck","sp_sp_bck","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro","sp_sp_tro")
vsd_common_sp_may2018 = heatmapFunction(newColNames,common_sp_lfc2,common_sp_all_lfc2,vsdMay2018BckTro2,"May2018 common SP genes")

# Inferences statistics
count_tab_assay <- assay(vsdMay2018Bck)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesBck,dist_tab_assay ~ site, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ site, data = samplesBck, method = "euclidian")
##          Df SumOfSqs      R2      F Pr(>F)   
## site      2    16020 0.37447 1.7959  0.003 **
## Residual  6    26761 0.62553                 
## Total     8    42782 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesBck$site)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
##      pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
## 1 gm vs pv  1  8099.765 1.925246 0.3249226     0.1        0.3    
## 2 gm vs sp  1  7586.838 1.760155 0.3055742     0.1        0.3    
## 3 pv vs sp  1  8344.058 1.715727 0.3001765     0.1        0.3
mod <- betadisper(dist_tab_assay,samplesBck$site)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##          diff        lwr      upr     p adj
## pv-gm 7.00318 -1.8578996 15.86426 0.1123292
## sp-gm 8.24530 -0.6157798 17.10638 0.0651578
## sp-pv 1.24212 -7.6189601 10.10320 0.9046166
count_tab_assay <- assay(vsdMay2018BckTro)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesTroBck,dist_tab_assay ~ site + experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ site + experiment, data = samplesTroBck, method = "euclidian")
##            Df SumOfSqs      R2      F Pr(>F)    
## site        2    39247 0.20226 2.7914  0.001 ***
## experiment  1     7169 0.03695 1.0198  0.365    
## Residual   21   147630 0.76080                  
## Total      24   194047 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesTroBck2$originSite_finalSite_experiment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
##                     pairs Df SumsOfSqs   F.Model        R2 p.value p.adjusted
## 1  gm_gm_bck vs gm_gm_tro  1  5932.909 1.0428908 0.1480771   0.388      1.000
## 2  gm_gm_bck vs pv_pv_bck  1 10402.253 1.8742356 0.3190603   0.100      1.000
## 3  gm_gm_bck vs pv_pv_tro  1 12721.327 1.8858460 0.2122303   0.025      0.375
## 4  gm_gm_bck vs sp_sp_bck  1  9753.032 1.7230167 0.3010679   0.100      1.000
## 5  gm_gm_bck vs sp_sp_tro  1 14682.064 1.8902571 0.2395685   0.022      0.330
## 6  gm_gm_tro vs pv_pv_bck  1 13033.488 2.1235643 0.2614080   0.021      0.315
## 7  gm_gm_tro vs pv_pv_tro  1 17898.868 2.6047513 0.2244556   0.004      0.060
## 8  gm_gm_tro vs sp_sp_bck  1 12833.151 2.0661667 0.2561522   0.020      0.300
## 9  gm_gm_tro vs sp_sp_tro  1 18502.635 2.4175319 0.2320638   0.013      0.195
## 10 pv_pv_bck vs pv_pv_tro  1  6707.946 0.9407742 0.1184739   0.542      1.000
## 11 pv_pv_bck vs sp_sp_bck  1 10716.336 1.6920331 0.2972634   0.100      1.000
## 12 pv_pv_bck vs sp_sp_tro  1 11559.789 1.4070061 0.1899561   0.107      1.000
## 13 pv_pv_tro vs sp_sp_bck  1 10597.093 1.4731946 0.1738653   0.023      0.345
## 14 pv_pv_tro vs sp_sp_tro  1 12536.034 1.5181994 0.1443402   0.035      0.525
## 15 sp_sp_bck vs sp_sp_tro  1  8377.115 1.0105796 0.1441507   0.431      1.000
##    sig
## 1     
## 2     
## 3     
## 4     
## 5     
## 6     
## 7     
## 8     
## 9     
## 10    
## 11    
## 12    
## 13    
## 14    
## 15
mod <- betadisper(dist_tab_assay,samplesTroBck2$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                           diff        lwr      upr     p adj
## gm_gm_tro-gm_gm_bck  12.644076 -12.863302 38.15145 0.6286622
## pv_pv_bck-gm_gm_bck   7.423454 -21.094661 35.94157 0.9596342
## pv_pv_tro-gm_gm_bck  21.751592  -2.945821 46.44900 0.1041111
## sp_sp_bck-gm_gm_bck   8.592794 -19.925321 37.11091 0.9273395
## sp_sp_tro-gm_gm_bck  27.534088   2.026710 53.04147 0.0299137
## pv_pv_bck-gm_gm_tro  -5.220622 -30.728000 20.28676 0.9856643
## pv_pv_tro-gm_gm_tro   9.107516 -12.042085 30.25712 0.7487049
## sp_sp_bck-gm_gm_tro  -4.051282 -29.558660 21.45610 0.9954921
## sp_sp_tro-gm_gm_tro  14.890012  -7.200025 36.98005 0.3142160
## pv_pv_tro-pv_pv_bck  14.328138 -10.369275 39.02555 0.4695593
## sp_sp_bck-pv_pv_bck   1.169340 -27.348775 29.68746 0.9999939
## sp_sp_tro-pv_pv_bck  20.110634  -5.396744 45.61801 0.1759622
## sp_sp_bck-pv_pv_tro -13.158798 -37.856210 11.53861 0.5578661
## sp_sp_tro-pv_pv_tro   5.782496 -15.367104 26.93210 0.9506257
## sp_sp_tro-sp_sp_bck  18.941294  -6.566084 44.44867 0.2243494
count_tab_assay <- assay(vsdMay2018BckTro2)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesTroBck2,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesTroBck2, method = "euclidian")
##                                 Df SumOfSqs      R2      F Pr(>F)    
## originSite_finalSite_experiment  5    60265 0.31057 1.7118  0.001 ***
## Residual                        19   133782 0.68943                  
## Total                           24   194047 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesTroBck2$originSite_finalSite_experiment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
##                     pairs Df SumsOfSqs   F.Model        R2 p.value p.adjusted
## 1  gm_gm_bck vs gm_gm_tro  1  5932.909 1.0428908 0.1480771   0.343      1.000
## 2  gm_gm_bck vs pv_pv_bck  1 10402.253 1.8742356 0.3190603   0.100      1.000
## 3  gm_gm_bck vs pv_pv_tro  1 12721.327 1.8858460 0.2122303   0.029      0.435
## 4  gm_gm_bck vs sp_sp_bck  1  9753.032 1.7230167 0.3010679   0.100      1.000
## 5  gm_gm_bck vs sp_sp_tro  1 14682.064 1.8902571 0.2395685   0.020      0.300
## 6  gm_gm_tro vs pv_pv_bck  1 13033.488 2.1235643 0.2614080   0.021      0.315
## 7  gm_gm_tro vs pv_pv_tro  1 17898.868 2.6047513 0.2244556   0.004      0.060
## 8  gm_gm_tro vs sp_sp_bck  1 12833.151 2.0661667 0.2561522   0.018      0.270
## 9  gm_gm_tro vs sp_sp_tro  1 18502.635 2.4175319 0.2320638   0.017      0.255
## 10 pv_pv_bck vs pv_pv_tro  1  6707.946 0.9407742 0.1184739   0.523      1.000
## 11 pv_pv_bck vs sp_sp_bck  1 10716.336 1.6920331 0.2972634   0.100      1.000
## 12 pv_pv_bck vs sp_sp_tro  1 11559.789 1.4070061 0.1899561   0.080      1.000
## 13 pv_pv_tro vs sp_sp_bck  1 10597.093 1.4731946 0.1738653   0.030      0.450
## 14 pv_pv_tro vs sp_sp_tro  1 12536.034 1.5181994 0.1443402   0.031      0.465
## 15 sp_sp_bck vs sp_sp_tro  1  8377.115 1.0105796 0.1441507   0.420      1.000
##    sig
## 1     
## 2     
## 3     
## 4     
## 5     
## 6     
## 7     
## 8     
## 9     
## 10    
## 11    
## 12    
## 13    
## 14    
## 15
mod <- betadisper(dist_tab_assay,samplesTroBck2$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                           diff        lwr      upr     p adj
## gm_gm_tro-gm_gm_bck  12.644076 -12.863302 38.15145 0.6286622
## pv_pv_bck-gm_gm_bck   7.423454 -21.094661 35.94157 0.9596342
## pv_pv_tro-gm_gm_bck  21.751592  -2.945821 46.44900 0.1041111
## sp_sp_bck-gm_gm_bck   8.592794 -19.925321 37.11091 0.9273395
## sp_sp_tro-gm_gm_bck  27.534088   2.026710 53.04147 0.0299137
## pv_pv_bck-gm_gm_tro  -5.220622 -30.728000 20.28676 0.9856643
## pv_pv_tro-gm_gm_tro   9.107516 -12.042085 30.25712 0.7487049
## sp_sp_bck-gm_gm_tro  -4.051282 -29.558660 21.45610 0.9954921
## sp_sp_tro-gm_gm_tro  14.890012  -7.200025 36.98005 0.3142160
## pv_pv_tro-pv_pv_bck  14.328138 -10.369275 39.02555 0.4695593
## sp_sp_bck-pv_pv_bck   1.169340 -27.348775 29.68746 0.9999939
## sp_sp_tro-pv_pv_bck  20.110634  -5.396744 45.61801 0.1759622
## sp_sp_bck-pv_pv_tro -13.158798 -37.856210 11.53861 0.5578661
## sp_sp_tro-pv_pv_tro   5.782496 -15.367104 26.93210 0.9506257
## sp_sp_tro-sp_sp_bck  18.941294  -6.566084 44.44867 0.2243494
# September 2018 dataset

# Working environment and data loading
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesBck<-read.table('tximport_design_gardenShort_bck.txt',header=T)
samplesGsBck<-read.table('tximport_design_gardenShort_gs_bck.txt',header=T)
samplesGsBck2<-read.table('tximport_design_gardenShort_gs_bck_2.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/sept2018'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/gardenShort/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/sept2018", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
raw_counts_bck <- raw_counts[,grep("bck", colnames(raw_counts))]
raw_counts_bck_gs <- raw_counts[,grep("bck|gm_gm_gas|pv_pv_gas|sp_sp_gas", colnames(raw_counts))] 

# DDS object
ddsBck<-DESeqDataSetFromMatrix(countData = raw_counts_bck, colData = samplesBck,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
ddsBckGs<-DESeqDataSetFromMatrix(countData = raw_counts_bck_gs, colData = samplesGsBck,design = ~site + experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
ddsBckGs2<-DESeqDataSetFromMatrix(countData = raw_counts_bck_gs, colData = samplesGsBck2,design = ~originSite_finalSite_experiment) 
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(ddsBck)) >= 10 
ddsBck <- ddsBck[keep,]
keep <- rowSums(counts(ddsBckGs)) >= 10 
ddsBckGs <- ddsBckGs[keep,]
keep <- rowSums(counts(ddsBckGs2)) >= 10 
ddsBckGs2 <- ddsBckGs2[keep,]

# Differential expression analysis
ddsBck<-DESeq(ddsBck)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsBck))
##      [,1]                                                    
## [1,] "Intercept"                                             
## [2,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_gm_bck"
ddsBckGs<-DESeq(ddsBckGs)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 150 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(ddsBckGs))
##      [,1]                   
## [1,] "Intercept"            
## [2,] "site_pv_vs_gm"        
## [3,] "site_sp_vs_gm"        
## [4,] "experiment_gas_vs_bck"
ddsBckGs2<-DESeq(ddsBckGs2)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 73 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(ddsBckGs2))
##      [,1]                                                    
## [1,] "Intercept"                                             
## [2,] "originSite_finalSite_experiment_gm_gm_gas_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_gm_bck"
## [4,] "originSite_finalSite_experiment_pv_pv_gas_vs_gm_gm_bck"
## [5,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_gm_bck"
## [6,] "originSite_finalSite_experiment_sp_sp_gas_vs_gm_gm_bck"
# General 
vsdSept2018Bck = vst(ddsBck,blind=T)
vsdSept2018BckGs = vst(ddsBckGs,blind=T)
vsdSept2018BckGs2 = vst(ddsBckGs2,blind=T)

pcaData = plotPCA(vsdSept2018Bck, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - Background") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

pcaData = plotPCA(vsdSept2018BckGs, intgroup=c("site","experiment"),returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = site,shape = experiment)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
  scale_shape_manual(values = c("circle", "triangle")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - Background & garden short same site") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Common gm
res_pv_gm_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","pv_pv_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_gm_lfc2<-trimFunction(res_pv_gm_lfc2,0.05,1)

res_sp_gm_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","sp_sp_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_gm_lfc2<-trimFunction(res_sp_gm_lfc2,0.05,1)

common_gm_lfc2 <- merge(res_pv_gm_lfc2,res_sp_gm_lfc2,by="genes")
common_gm_annot_lfc2 = merge(common_gm_lfc2,A_pfam,by="genes")
common_gm_all_lfc2 <- applyAnnot(common_gm_lfc2,common_gm_annot_lfc2)

newColNames = samplesGsBck2$originSite_finalSite_experiment
vsd_common_gm_sept2018 = heatmapFunction(newColNames,common_gm_lfc2,common_gm_all_lfc2,vsdSept2018BckGs2,"Sept2018 common GM genes")

# Common pv
res_gm_pv_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","gm_gm_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_pv_lfc2<-trimFunction(res_gm_pv_lfc2,0.05,1)

res_sp_pv_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","sp_sp_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_pv_lfc2<-trimFunction(res_sp_pv_lfc2,0.05,1)

common_pv_lfc2 <- merge(res_gm_pv_lfc2,res_sp_pv_lfc2,by="genes")
common_pv_annot_lfc2 = merge(common_pv_lfc2,A_pfam,by="genes")
common_pv_all_lfc2 <- applyAnnot(common_pv_lfc2,common_pv_annot_lfc2)

newColNames = samplesGsBck2$originSite_finalSite_experiment
vsd_common_pv_sept2018 = heatmapFunction(newColNames,common_pv_lfc2,common_pv_all_lfc2,vsdSept2018BckGs2,"Sept2018 common PV genes")

# Common sp
res_gm_sp_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","gm_gm_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_sp_lfc2<-trimFunction(res_gm_sp_lfc2,0.05,1)

res_pv_sp_lfc2<-results(ddsBckGs2, contrast=c("originSite_finalSite_experiment","pv_pv_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_sp_lfc2<-trimFunction(res_pv_sp_lfc2,0.05,1)

common_sp_lfc2 <- merge(res_gm_sp_lfc2,res_pv_sp_lfc2,by="genes")
common_sp_annot_lfc2 = merge(common_sp_lfc2,A_pfam,by="genes")
common_sp_all_lfc2 <- applyAnnot(common_sp_lfc2,common_sp_annot_lfc2)

newColNames = samplesGsBck2$originSite_finalSite_experiment
vsd_common_sp_sept2018 = heatmapFunction(newColNames,common_sp_lfc2,common_sp_all_lfc2,vsdSept2018BckGs2,"Sept2018 common SP genes")

# Inferences statistics
count_tab_assay <- assay(vsdSept2018Bck)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesBck,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesBck, method = "euclidian")
##                                 Df SumOfSqs      R2      F Pr(>F)   
## originSite_finalSite_experiment  2    21353 0.33483 1.5101  0.005 **
## Residual                         6    42420 0.66517                 
## Total                            8    63773 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesBck$originSite_finalSite_experiment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
##                    pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
## 1 gm_gm_bck vs pv_pv_bck  1  10427.74 1.297030 0.2448599     0.1        0.3    
## 2 gm_gm_bck vs sp_sp_bck  1  11439.66 1.472259 0.2690405     0.1        0.3    
## 3 pv_pv_bck vs sp_sp_bck  1  10162.06 1.881833 0.3199399     0.1        0.3
mod <- betadisper(dist_tab_assay,samplesBck$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                           diff       lwr      upr     p adj
## pv_pv_bck-gm_gm_bck -20.366677 -67.75532 27.02196 0.4363563
## sp_sp_bck-gm_gm_bck -22.954352 -70.34299 24.43429 0.3611737
## sp_sp_bck-pv_pv_bck  -2.587675 -49.97631 44.80096 0.9846831
count_tab_assay <- assay(vsdSept2018BckGs2)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesGsBck2,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesGsBck2, method = "euclidian")
##                                 Df SumOfSqs      R2      F Pr(>F)    
## originSite_finalSite_experiment  5    67047 0.30745 1.9534  0.001 ***
## Residual                        22   151025 0.69255                  
## Total                           27   218072 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGsBck2$originSite_finalSite_experiment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
##                     pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted
## 1  gm_gm_bck vs gm_gm_gas  1  9944.572 1.336390 0.1431378   0.055      0.825
## 2  gm_gm_bck vs pv_pv_bck  1 10485.788 1.294760 0.2445361   0.100      1.000
## 3  gm_gm_bck vs pv_pv_gas  1 18669.473 2.304333 0.2476624   0.015      0.225
## 4  gm_gm_bck vs sp_sp_bck  1 11507.405 1.468143 0.2684902   0.100      1.000
## 5  gm_gm_bck vs sp_sp_gas  1 14734.198 1.885532 0.2122025   0.018      0.270
## 6  gm_gm_gas vs pv_pv_bck  1  9948.023 1.588755 0.1656894   0.015      0.225
## 7  gm_gm_gas vs pv_pv_gas  1 16696.124 2.468151 0.1832583   0.001      0.015
## 8  gm_gm_gas vs sp_sp_bck  1 14704.232 2.398249 0.2306397   0.007      0.105
## 9  gm_gm_gas vs sp_sp_gas  1 11894.153 1.807172 0.1411062   0.009      0.135
## 10 pv_pv_bck vs pv_pv_gas  1  9372.151 1.387748 0.1654494   0.052      0.780
## 11 pv_pv_bck vs sp_sp_bck  1 10210.338 1.863755 0.3178432   0.100      1.000
## 12 pv_pv_bck vs sp_sp_gas  1  8730.180 1.350178 0.1616945   0.074      1.000
## 13 pv_pv_gas vs sp_sp_bck  1 24817.344 3.757582 0.3492962   0.017      0.255
## 14 pv_pv_gas vs sp_sp_gas  1 12937.022 1.859292 0.1567793   0.010      0.150
## 15 sp_sp_bck vs sp_sp_gas  1 13810.309 2.186195 0.2379870   0.033      0.495
##    sig
## 1     
## 2     
## 3     
## 4     
## 5     
## 6     
## 7    .
## 8     
## 9     
## 10    
## 11    
## 12    
## 13    
## 14    
## 15
mod <- betadisper(dist_tab_assay,samplesGsBck2$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                            diff        lwr       upr     p adj
## gm_gm_gas-gm_gm_bck  -7.5289472 -31.498202 16.440307 0.9199561
## pv_pv_bck-gm_gm_bck -20.2226896 -48.583494  8.138115 0.2680148
## pv_pv_gas-gm_gm_bck  -4.6719591 -29.233136 19.889218 0.9904878
## sp_sp_bck-gm_gm_bck -22.6996779 -51.060482  5.661126 0.1690619
## sp_sp_gas-gm_gm_bck  -6.8224507 -31.383628 17.738726 0.9508675
## pv_pv_bck-gm_gm_gas -12.6937424 -36.662997 11.275512 0.5765839
## pv_pv_gas-gm_gm_gas   2.8569881 -16.467643 22.181619 0.9970448
## sp_sp_bck-gm_gm_gas -15.1707307 -39.139985  8.798524 0.3887079
## sp_sp_gas-gm_gm_gas   0.7064964 -18.618134 20.031127 0.9999969
## pv_pv_gas-pv_pv_bck  15.5507305  -9.010446 40.111907 0.3883433
## sp_sp_bck-pv_pv_bck  -2.4769883 -30.837793 25.883816 0.9997668
## sp_sp_gas-pv_pv_bck  13.4002388 -11.160938 37.961416 0.5461570
## sp_sp_bck-pv_pv_gas -18.0277188 -42.588896  6.533458 0.2411619
## sp_sp_gas-pv_pv_gas  -2.1504917 -22.204609 17.903625 0.9993656
## sp_sp_gas-sp_sp_bck  15.8772271  -8.683950 40.438404 0.3664698
count_tab_assay <- assay(vsdSept2018BckGs)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesGsBck,dist_tab_assay ~ site + experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ site + experiment, data = samplesGsBck, method = "euclidian")
##            Df SumOfSqs      R2      F Pr(>F)    
## site        2    33920 0.15554 2.4517  0.001 ***
## experiment  1    18126 0.08312 2.6203  0.001 ***
## Residual   24   166026 0.76133                  
## Total      27   218072 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGsBck$site)
pair.mod
##      pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
## 1 gm vs pv  1  19170.90 2.584045 0.1319464   0.001      0.003   *
## 2 gm vs sp  1  14511.28 1.934758 0.1021803   0.004      0.012   .
## 3 pv vs sp  1  17210.66 2.401285 0.1304955   0.009      0.027   .
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGsBck$experiment)
pair.mod
##        pairs Df SumsOfSqs  F.Model         R2 p.value p.adjusted sig
## 1 bck vs gas  1  17845.25 2.317261 0.08183209   0.003      0.003   *
mod <- betadisper(dist_tab_assay,samplesGsBck$site)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##            diff       lwr      upr     p adj
## pv-gm -3.510399 -18.58116 11.56036 0.8318672
## sp-gm -2.479455 -17.55021 12.59130 0.9119145
## sp-pv  1.030944 -14.43133 16.49322 0.9849188
# May2018 & Sept2018 - Background
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesBck<-read.table('tximport_design_background2018.txt',header=T)
dataPathBck<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/bck'
setwd(dataPathBck)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/bck", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
samplesOrder = c(4,5,6,13,14,15,1,2,3,10,11,12,7,8,9,16,17,18)
raw_counts <- raw_counts[,samplesOrder]

# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts, colData = samplesBck,design = ~originSite_finalSite_experiment + dataset)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10 
dds <- dds[keep,]

# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
##      [,1]                                                    
## [1,] "Intercept"                                             
## [2,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_gm_bck"
## [4,] "dataset_sept2018_vs_may2018"
# General 
vsd = vst(dds,blind=T)

pcaData = plotPCA(vsd, intgroup=c("originSite_finalSite_experiment","dataset"),returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment,shape=dataset)) + 
  geom_point(size = 5) + theme_bw() + 
  scale_color_manual(values = c("#ff4040", "#00008B","#6495ED")) +
  scale_shape_manual(values = c("circle", "triangle")) +
  geom_point() +
  ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 & sept2018 dataset - Background") +
  theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) 

# Common gm
res_pv_gm_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_gm_lfc2<-trimFunction(res_pv_gm_lfc2,0.05,1)

res_sp_gm_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","gm_gm_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_gm_lfc2<-trimFunction(res_sp_gm_lfc2,0.05,1)

common_gm_lfc2 <- merge(res_pv_gm_lfc2,res_sp_gm_lfc2,by="genes")
common_gm_annot_lfc2 = merge(common_gm_lfc2,A_pfam,by="genes")
common_gm_all_lfc2 <- applyAnnot(common_gm_lfc2,common_gm_annot_lfc2)

newColNames = samplesBck$originSite_finalSite_experiment
vsd_common_gm = heatmapFunction(newColNames,common_gm_lfc2,common_gm_all_lfc2,vsd,"May2018 & Sept2018 common GM genes")

# Common pv
res_gm_pv_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_pv_lfc2<-trimFunction(res_gm_pv_lfc2,0.05,1)

res_sp_pv_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","pv_pv_bck"), lfcThreshold = 1,alpha = 0.05)
res_sp_pv_lfc2<-trimFunction(res_sp_pv_lfc2,0.05,1)

common_pv_lfc2 <- merge(res_gm_pv_lfc2,res_sp_pv_lfc2,by="genes")
common_pv_annot_lfc2 = merge(common_pv_lfc2,A_pfam,by="genes")
common_pv_all_lfc2 <- applyAnnot(common_pv_lfc2,common_pv_annot_lfc2)

newColNames = samplesBck$originSite_finalSite_experiment
vsd_common_pv_ = heatmapFunction(newColNames,common_pv_lfc2,common_pv_all_lfc2,vsd,"May2018 & Sept2018 common PV genes")

# Common sp
res_gm_sp_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_gm_sp_lfc2<-trimFunction(res_gm_sp_lfc2,0.05,1)

res_pv_sp_lfc2<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","sp_sp_bck"), lfcThreshold = 1,alpha = 0.05)
res_pv_sp_lfc2<-trimFunction(res_pv_sp_lfc2,0.05,1)

common_sp_lfc2 <- merge(res_gm_sp_lfc2,res_pv_sp_lfc2,by="genes")
common_sp_annot_lfc2 = merge(common_sp_lfc2,A_pfam,by="genes")
common_sp_all_lfc2 <- applyAnnot(common_sp_lfc2,common_sp_annot_lfc2)

newColNames = samplesBck$originSite_finalSite_experiment
vsd_common_sp = heatmapFunction(newColNames,common_sp_lfc2,common_sp_all_lfc2,vsd,"May2018 & Sept2018 common SP genes")

# Inferences statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesBck,dist_tab_assay ~ originSite_finalSite_experiment + dataset, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment + dataset, data = samplesBck, method = "euclidian")
##                                 Df SumOfSqs      R2      F Pr(>F)    
## originSite_finalSite_experiment  2    19376 0.19413 1.8294  0.001 ***
## dataset                          1     6294 0.06305 1.1884  0.156    
## Residual                        14    74142 0.74282                  
## Total                           17    99812 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesBck$originSite_finalSite_experiment)
pair.mod
##                    pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
## 1 pv_pv_bck vs gm_gm_bck  1 10638.019 1.956959 0.1636669   0.010      0.030   .
## 2 pv_pv_bck vs sp_sp_bck  1  9361.071 1.896908 0.1594455   0.005      0.015   .
## 3 gm_gm_bck vs sp_sp_bck  1  9064.984 1.585828 0.1368765   0.004      0.012   .
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesBck$dataset)
pair.mod
##                 pairs Df SumsOfSqs  F.Model         R2 p.value p.adjusted sig
## 1 may2018 vs sept2018  1  6293.505 1.076753 0.06305371   0.288      0.288
mod <- betadisper(dist_tab_assay,samplesBck$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                          diff       lwr      upr     p adj
## pv_pv_bck-gm_gm_bck -5.294183 -34.62293 24.03456 0.8867651
## sp_sp_bck-gm_gm_bck -1.712932 -31.04168 27.61581 0.9874031
## sp_sp_bck-pv_pv_bck  3.581252 -25.74749 32.91000 0.9462536
mod <- betadisper(dist_tab_assay,samplesBck$dataset)
mod.HSD <- TukeyHSD(mod)
mod.HSD
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                      diff       lwr      upr    p adj
## sept2018-may2018 3.760636 -10.99637 18.51764 0.596478